##!/bin/bash
###################################################################################
## run1raw_cohort.sh - is bash scripts which runs the sas programs in the correct order 
##                    calling SAS or stata. In addition it also will send you an email if the prgms crash
##                    and it provides several other automations. IN particular it allows for defining of parameters in one file which are then passed to SAS or Stata files
##                    You can actutally call this file anything you want and its smart enoguh to create a log under the same name. The bash program is found
##                    SASmacros/runcode.sh $LOGNAME
##
##                    The programs sequenced here 
###################################################################################

###################################################################################
## USER INSTRUCTIONS (TODOS)
##
## TODO1. Users need to make sure that they have properly defined the directories 
##    at the top of the dirInit.sas and dirInit.do programs
##
## TODO2. SET EMAILON to 1 or 0 depending on whether you want to receive an email on crash
## 
## TODO3. Next users need to ensure that parameters are defined correctly for cohrt they are looking 
##    define. The section has several variables, some of which for the different restrictions the 
##    programs place on the cohorts. Everything should be explained in this section. The bash prgm
##    passes all of these parameters to the sas prgms.
##    NOTE: users need to define the ICD9 OR HCPCS codes they are using in the 1cohort_extraction/1A_flag_candidate_evnt UNDER
##           HCPCScohrtDef.sas or/and ICD9cohrtDef.sas files. The cohorts defined their should match the COHRT NAME as well.
##
## TODO4. Finally the user needs to make sure this programs is running all of the programs of interest. 
##    or comment out and/or add other programs for this master program to run
##
## TODO5. To run this bash scrip from the terminal, type in "bash run1raw_cohort_PK.sh"
##
## TODO6. MAKE SURE YOU ONLY RUN PROGRAMS WHICH YOUR DUA COVERS!
##
## TODO7. RUN THIS PROGRAM FROM TERMINAL 'nohup bash run1raw_cohort_PK.sh & '
###################################################################################

## IMPORT FUNCTION -'runcode'  WHICH IS USED TO RUN SAS/STATA PRGRMS AND AUTOMATE LOGS AND ERROR CHECKING
## IMPORT FUNCTION -'runcode'  WHICH IS USED TO RUN SAS/STATA PRGRMS AND AUTOMATE LOGS AND ERROR CHECKING
LOGNAMESH=`basename "$0"`
LOGNAME="${LOGNAMESH%.*}"

## IMPORT FUNCTION -'runcode'  WHICH IS USED TO RUN SAS/STATA PRGRMS AND AUTOMATE LOGS AND ERROR CHECKING
source SASmacros/runcode.sh $LOGNAME


##------------------------------------------------------------------------------
## TODO2: DO YOU WANT THE PROGRAM TO EMAIL YOU ON CRASH - SET TO 1 FOR YES
##----------------------------------------------------b-------------------------
## EMAIL WILL BE SENT TO YOUR NBER EMAIL

EMAILON=1
rm $LOGNAME.log -f
touch $LOGNAME.log
echo "RUNNING PROGRAM ON THIS SERVER:: `uname -n`" >$LOGNAME.log


##------------------------------------------------------------------------------
## TODO3: DEFINE VARIABLES TO BE PASSED ONTO SAS MACRO WITHIN EACH PROGRAMS
##------------------------------------------------------------------------------
##I have defined loops here - you should just need to put your percent and cohort names in their,
# note that I have not created harmonized 01 files.
# only choose from  0001 05 20 100

for p in "20" ; do
    for cohrt  in stk  ; do
       ## PARAMETERS USED BY ALL FILES
        ## COHORT LABEL USED TO SAVE DATASETS - YOU CREATE THIS. 
        ##   SHOULD MATCH THAT FOUND IN 1cohort_extraction/1A_flag_candidate_evnt UNDER
        ##   THE FOLLOWING FILES HCPCScohrtDef.sas or/and ICD9cohrtDef.sas 
        COHRT=$cohrt
        ##THE FIRST YEAR OF RAW DATA YOU WANT TO PROCESS :: CAR/OP/MEDPAR/IP MAY ALL HAVE DIFFERNT START DATES
             ## DIFFERENT PERCENTS ALSO HAVE DIFFERENT START DATES
	if [[ "$p" -eq "0001" ]] 
	then
            MEDYEARIN=1999
            OPYEARIN=1999
	elif [[ "$p" -eq "01" ]] 
	then
            MEDYEARIN=1999
            OPYEARIN=1999
	elif [[ "$p" -eq "05" ]] 
	then
            MEDYEARIN=1999
            OPYEARIN=1999
	elif [[ "$p" -eq "20" ]]
	then
            MEDYEARIN=1999
            OPYEARIN=1999

	elif [[ "$p" -eq "100" ]]
	then
            MEDYEARIN=1999
            OPYEARIN=1999
	fi;

             ##THE LAST YEAR OF DATA YOU WANT TO PROCESS 
        YEAROUT=2015
             ##NOTE TO BE PLACED IN THE LOG FILE ARCHIVES
        NOTE=SHRUTHI_AMI_RUN_NEW
        ##SIZE OF FILES YOU ARE RUNNING
        PCT=$p
             ##WINDOW OF CLAIMS TO PULL BEFORE AND AFTER INDEX EVENT DATE- IN YEARS
        WINYR=1
             ##STAT-TRANSFER THE FILES FROM SAS TO STATA (1=YES) -ADDS ALOT OF PROCESSING TIME
        CONVERT2STATA=1
        
       ## 1B PRGM PARAMETERS -- RESTRICTIONS
             ##~~~~ VARIABLES DENOTING WHICH CLAIMS FILES USED TO DEFINE AN INDEX EVENTS
             ##     A ZERO HERE TURNS OF THE DXN AND PROC BELOW
             ##     YOU ALSO NEED TO MAKE SURE TO RUN THE CORRECT 1A PRGMS BELOWS. FOR EXAMPLE
             ##     IF I PLANNED TO USE CARRIER FILE THEN THE runcode 1cohort_extraction/1A_flag_candidate_evnt carrier_candidates.sas
             ##     PRGM SHOULD BE UNCOMMENTED AND THE 'CAR' VARIABLE BELOW SHOULD BE SET TO 1
        IP=0   
        MED=1   
        CAR=0   
        OP=0    
        
             ## ~~~ For each claim used denote the number of procedure or diagnostic codes used 
             ##     (for ip_prcPos=2 use the first 2 procedure codes from IP file)
        
             ## ~~~ DIAGNOSTIC VARIABLES USED (i.e a 2 would loop through the 1st and 2nd dgn vars - 
             ##     set to 1 if you only want the principal diagnosis)
        IP_DXN=0 
        if [[ "$cohrt" == "hk" ]]
        then
        	MED_DXN=0
        	MED_PROC=25
        else
        	MED_DXN=1
        	MED_PROC=0 
        fi
        
        CAR_DXN=0              
        OP_DXN=0 
             ## ~~~ PROCEDURE CODES USED (i.e. 2 would loop trhough the 1st and 2nd proc vars - 0 ignores them)
        IP_PROC=0 
        OP_PROC=0 
              ## ~~~ NUMBER OF DAYS WHICH MUST PASS BETWEEN A REPEAT INDEX EVENT, FOR A SINGLE BENEFICIARY
             ##     BEFORE THE CLAIM CAN BE CONSIDERED A NEW INDEX EVENT
        DAYS_BETWEEN_NEWINDEX_EVENT=365                            
        
             ## ~~~ ENROLMENT RESTRICTIONS - NUMBER OF MONTHS ENROLLED IN BOTH PART A AND B FEE-FOR-SERVICE 
             ##     (POST INDEX EVENT / PRE INDEX EVENT)
        FFSmonthpost=12
        FFSmonthpre=12
             ## ~~~ HMO ENROLMENT RESTRICTIONS -- NUMBER OF MONTHS NOT ENROLLED IN HMO
        HMOmonthpost=12
        HMOmonthpre=12
##------------------------------------------------------------------------------
## <END> DEFINE VARIABLES TO BE PASSED ONTO SAS MACRO WITHIN EACH PROGRAMS
##------------------------------------------------------------------------------

##------------------------------------------------------------------------------
## CHECK TO MAKE SURE COHORT HAS BEEN DEFINED CORRECTLY -- AUTOMATED
##------------------------------------------------------------------------------
        if [[ `grep "$COHRT" '1cohort_extraction/1A_flag_candidate_evnt/icd9_chrt_definition.sas' -c` -eq  0 ]] &&  [[ `grep '$COHRT' '1cohort_extraction/1A_flag_candidate_evnt/hcpcs_chrt_defintion.sas' -c` -eq  0 ]] ; then  
            echo "#####  COHRT NOT DEFINED CORRECTLY !! ---  #########################################"  >> $LOGNAME.log
            echo "COMPARE THE COHRT VARIABLE IN $LOGNAME.sh AND THE COHRT DEFINITIONS FOUND IN 1cohort_extraction/1A_flag_candidate_evnt/*definition.sas" >> $LOGNAME.log
            if [ $EMAILON -eq 1 ]; then  
                echo "CRASH SENDING EMAIL"; 
                mail -s "MEDICARE CODE CRASH: COHORTS NOT DEFINED CORRECTLY" `whoami`@nber.org 
            fi                    
	    echo "COHRT NOT DEFINED CORRECTLY"
	    exit 0
        fi          
##------------------------------------------------------------------------------



##------------------------------------------------------------------------------
## TODO4: REVIEW FOLLOWING LIST OF PROGRAMS TO BE RUN -- COMMENT OUT PRGMS WHICH
##        DO NOT COVER DUA OR WHICH YOU HAVE NO INTEREST IN RUNNING.
##------------------------------------------------------------------------------
###################################################################################
## STEP 1:: 1cohort_extraction : Extract and select candidates index events
###################################################################################
##NOTE: ONLY RUN THE FILES WHICH YOU USE TO DEFINE THE INDEX EVENTS. DO NOT SWITCH ORDER OF VARIABLES 
##------------------------------------------------------------------------------

#runcode 1cohort_extraction/1A_flag_candidate_evnt ip_candidates.sas cohrt=$COHRT,pct=$PCT,yearin=$IPYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR,ip_dxn=$IP_DXN,ip_proc=$IP_PROC,ip=$IP
runcode 1cohort_extraction/1A_flag_candidate_evnt medpar_candidates.sas cohrt=$COHRT,pct=$PCT,yearin=$MEDYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR,med_dxn=$MED_DXN,med_proc=$MED_PROC,med=$MED
#runcode 1cohort_extraction/1A_flag_candidate_evnt op_candidates.sas	 cohrt=$COHRT,pct=$PCT,yearin=$OPYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR,op_dxn=$OP_DXN,op_proc=$OP_PROC,op=$OP
#runcode 1cohort_extraction/1A_flag_candidate_evnt carrier_candidates.sas cohrt=$COHRT,pct=$PCT,yearin=$CARYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR,car_dxn=$CAR_DXN,car=$CAR


###################################################################################
## STEP 1B:: BRING IN DENOMINATOR FILE AND ENFORCE ENROLLMENT RESTRICTIONS
###################################################################################
## THIS step generated the final list of index events used in the 1C scripts
## to extract a 1-year window of claims around the index event.
	runcode 1cohort_extraction 1B_impose_restrictions.sas cohrt=$COHRT,pct=$PCT,yearin=$MEDYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR,ip_dxn=$IP_DXN,ip_proc=$IP_PROC,ip=$IP,med_dxn=$MED_DXN,med_proc=$MED_PROC,med=$MED,op_dxn=$OP_DXN,op_proc=$OP_PROC,op=$OP,car_dxn=$CAR_DXN,car=$CAR,subsquentDgnDayRest=$DAYS_BETWEEN_NEWINDEX_EVENT,FFSmonthpost=$FFSmonthpost,FFSmonthpre=$FFSmonthpre,HMOmonthpost=$HMOmonthpost,HMOmonthpre=$HMOmonthpre



###################################################################################
## STEP 1C:: EXTRACT CLAIMS ##--ONLY RUN WHAT YOUR DUA COVERS
###################################################################################
	runcode 1cohort_extraction/1C_pull_claims medpar_clm_window.sas cohrt=$COHRT,pct=$PCT,yearin=$MEDYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR


 	##if [[ "$p" -ne "100" ]]
 	##then
 	    ##runcode 1cohort_extraction/1C_pull_claims carrier_clm_window.sas cohrt=$COHRT,pct=$PCT,yearin=$CARYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR                
           ## echo "test"
 	##fi
	
 
	##runcode 1cohort_extraction/1C_pull_claims ip_clm_window.sas  cohrt=$COHRT,pct=$PCT,yearin=$IPYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR
	##runcode 1cohort_extraction/1C_pull_claims op_clm_window.sas cohrt=$COHRT,pct=$PCT,yearin=$OPYEARIN,yearout=$YEAROUT,NOTE=$NOTE,clmDTA=$CONVERT2STATA,winyear=$WINYR

        echo "hi;  $COHRT $PCT $NOTE loop has finished running. cheers, `uname -n`" |  mail -s "FINISHED $COHRT $PCT  RUN" `whoami`@nber.org   

    done;
done;

echo "good day; I have finished running your programs. Enjoy! `uname -n`" |  mail -s "FINISHED ALL RUNs" `whoami`@nber.org 

